home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
MacFormat 1999 Spring
/
macformat-077.iso
/
Shareware Plus
/
Development
/
SpriteWorld 2.2
/
SpriteWorld files
/
Sources
/
BlitPixieDoubleRect.c
< prev
next >
Encoding:
Amiga
Atari
Commodore
DOS
FM Towns/JPY
Macintosh
Macintosh JP
NeXTSTEP
RISC OS/Acorn
UTF-8
Wrap
Text File
|
1999-01-14
|
31.4 KB
|
1,313 lines
|
[
TEXT/CWIE
]
//--------------------------------------------------------------
// BlitPixieDoubleRect, v 4.4
// by Anders Björklund, May 1998
//
// This file contains the DrawProcs for use with the
// SWSetSpriteWorldDoubleRectDrawProc function in Scrolling.c.
//--------------------------------------------------------------
#include <SWIncludes.h>
//#define THEORY // uncomment this to use the "theoretical" version
//#define USE_C // uncomment this to use the C version of the blitter
typedef struct OffsetInfo
{
long srcOffsetAtoB; // offset from right of rect A to left of rect B
long srcOffsetBtoA; // offset from right of rect B to left of rect A, in next row
long dstOffsetAtoB;
long dstOffsetBtoA;
} OffsetInfo, *OffsetInfoPtr;
static void BlitDoubleRects(
char *src,
char *dst,
unsigned long rows,
unsigned long bytesA,
unsigned long bytesB,
OffsetInfoPtr info
);
//--------------------------------------------------------------------------------------
#pragma mark [Macros]
// Note: this version of CLIP_RECT is different from the BP_CLIP_RECT in BlitPixie.h!
// This version is designed to handle DoubleRect DrawProcs.
//#define CLIP_RECT(clip, src, dst, interlaced)
#define CLIP_RECT(r, r1, r2, interlaced) \
/* clip off the top so we don't write into random memory */ \
if (r2.top < r.top) { \
r1.top += r.top - r2.top; \
r2.top = r.top; \
} \
/* clip off the bottom */ \
if (r2.bottom > r.bottom) { \
r1.bottom -= r2.bottom - r.bottom; \
r2.bottom = r.bottom; \
} \
/* clip off the left */ \
if (r2.left < r.left) { \
r1.left += r.left - r2.left; \
r2.left = r.left; \
} \
/* clip off the right */ \
if (r2.right > r.right) { \
r1.right -= r2.right - r.right; \
r2.right = r.right; \
} \
if (interlaced) { \
/* If first line is not on an even number, then skip it. */ \
if ((r2.top - r.top) & 1) { \
r1.top++; \
r2.top++; \
} \
} \
/* Make sure height is valid */ \
if (r2.bottom <= r2.top) \
return; \
/* Make sure width is valid */ \
/* Instead of returning, we set the width to 0, so the other rect can be drawn */ \
if (r2.right < r2.left) { \
r1.right = r1.left; \
r2.right = r2.left; \
}
extern SInt8 gSWmmuMode;
///--------------------------------------------------------------------------------------
// BlitPixie8BitDoubleRectDrawProc
///--------------------------------------------------------------------------------------
SW_FUNC void BlitPixie8BitDoubleRectDrawProc(
FramePtr srcFrameP,
FramePtr dstFrameP,
Rect* srcRectA,
Rect* dstRectA,
Rect* srcRectB,
Rect* dstRectB)
{
OffsetInfo info;
Rect srcBlitRectA = *srcRectA,
dstBlitRectA = *dstRectA,
srcBlitRectB = *srcRectB,
dstBlitRectB = *dstRectB;
SW_ASSERT(srcFrameP->isFrameLocked && dstFrameP->isFrameLocked);
SW_ASSERT((*srcFrameP->framePort->portPixMap)->pixelSize == 8);
SW_ASSERT((*dstFrameP->framePort->portPixMap)->pixelSize == 8);
CLIP_RECT(dstFrameP->frameRect, srcBlitRectA, dstBlitRectA, false)
CLIP_RECT(dstFrameP->frameRect, srcBlitRectB, dstBlitRectB, false)
info.srcOffsetAtoB = srcBlitRectB.left - srcBlitRectA.right;
info.srcOffsetBtoA = (srcBlitRectA.left - srcBlitRectB.right) +
srcFrameP->frameRowBytes;
info.dstOffsetAtoB = dstBlitRectB.left - dstBlitRectA.right;
info.dstOffsetBtoA = (dstBlitRectA.left - dstBlitRectB.right) +
dstFrameP->frameRowBytes;
START_32_BIT_MODE
BlitDoubleRects(
// calculate the address of the first byte of the source
(srcFrameP->frameBaseAddr +
(srcFrameP->scanLinePtrArray[srcBlitRectA.top - srcFrameP->frameRect.top]) +
srcBlitRectA.left),
// calculate the address of the first byte of the destination
(dstFrameP->frameBaseAddr +
(dstFrameP->scanLinePtrArray[dstBlitRectA.top]) +
dstBlitRectA.left),
// calculate the number of rows to blit
dstBlitRectA.bottom - dstBlitRectA.top,
// == dstBlitRectB.bottom - dstBlitRectB.top
// pass rect widths and offset info
dstBlitRectA.right - dstBlitRectA.left,
dstBlitRectB.right - dstBlitRectB.left,
&info
);
END_32_BIT_MODE
}
///--------------------------------------------------------------------------------------
// BP8BitInterlacedDoubleRectDrawProc
///--------------------------------------------------------------------------------------
SW_FUNC void BP8BitInterlacedDoubleRectDrawProc(
FramePtr srcFrameP,
FramePtr dstFrameP,
Rect* srcRectA,
Rect* dstRectA,
Rect* srcRectB,
Rect* dstRectB)
{
OffsetInfo info;
Rect srcBlitRectA = *srcRectA,
dstBlitRectA = *dstRectA,
srcBlitRectB = *srcRectB,
dstBlitRectB = *dstRectB;
int numRowsToCopy;
SW_ASSERT(srcFrameP->isFrameLocked && dstFrameP->isFrameLocked);
SW_ASSERT((*srcFrameP->framePort->portPixMap)->pixelSize == 8);
SW_ASSERT((*dstFrameP->framePort->portPixMap)->pixelSize == 8);
CLIP_RECT(dstFrameP->frameRect, srcBlitRectA, dstBlitRectA, true)
CLIP_RECT(dstFrameP->frameRect, srcBlitRectB, dstBlitRectB, true)
numRowsToCopy = dstBlitRectA.bottom - dstBlitRectA.top;
// Is destBlitRect height an even number?
if ( ! (numRowsToCopy & 1) )
numRowsToCopy >>= 1;
else
numRowsToCopy = (numRowsToCopy>>1) + 1;
if (numRowsToCopy < 1)
return;
info.srcOffsetAtoB = srcBlitRectB.left - srcBlitRectA.right;
info.srcOffsetBtoA = (srcBlitRectA.left - srcBlitRectB.right) +
(srcFrameP->frameRowBytes << 1);
info.dstOffsetAtoB = dstBlitRectB.left - dstBlitRectA.right;
info.dstOffsetBtoA = (dstBlitRectA.left - dstBlitRectB.right) +
(dstFrameP->frameRowBytes << 1);
START_32_BIT_MODE
BlitDoubleRects(
// calculate the address of the first byte of the source
(srcFrameP->frameBaseAddr +
(srcFrameP->scanLinePtrArray[srcBlitRectA.top - srcFrameP->frameRect.top]) +
srcBlitRectA.left),
// calculate the address of the first byte of the destination
(dstFrameP->frameBaseAddr +
(dstFrameP->scanLinePtrArray[dstBlitRectA.top]) +
dstBlitRectA.left),
// calculate the number of rows to blit
numRowsToCopy,
// pass rect widths and offset info
SW_MAX(dstBlitRectA.right - dstBlitRectA.left,0),
SW_MAX(dstBlitRectB.right - dstBlitRectB.left,0),
&info
);
END_32_BIT_MODE
}
///--------------------------------------------------------------------------------------
// BlitPixie16BitDoubleRectDrawProc
///--------------------------------------------------------------------------------------
SW_FUNC void BlitPixie16BitDoubleRectDrawProc(
FramePtr srcFrameP,
FramePtr dstFrameP,
Rect* srcRectA,
Rect* dstRectA,
Rect* srcRectB,
Rect* dstRectB)
{
OffsetInfo info;
Rect srcBlitRectA = *srcRectA,
dstBlitRectA = *dstRectA,
srcBlitRectB = *srcRectB,
dstBlitRectB = *dstRectB;
SW_ASSERT(srcFrameP->isFrameLocked && dstFrameP->isFrameLocked);
SW_ASSERT((*srcFrameP->framePort->portPixMap)->pixelSize == 16);
SW_ASSERT((*dstFrameP->framePort->portPixMap)->pixelSize == 16);
CLIP_RECT(dstFrameP->frameRect, srcBlitRectA, dstBlitRectA, false)
CLIP_RECT(dstFrameP->frameRect, srcBlitRectB, dstBlitRectB, false)
info.srcOffsetAtoB = (srcBlitRectB.left - srcBlitRectA.right) << 1;
info.srcOffsetBtoA = ((srcBlitRectA.left - srcBlitRectB.right) << 1 ) +
srcFrameP->frameRowBytes;
info.dstOffsetAtoB = (dstBlitRectB.left - dstBlitRectA.right) << 1;
info.dstOffsetBtoA = ((dstBlitRectA.left - dstBlitRectB.right) << 1) +
dstFrameP->frameRowBytes;
START_32_BIT_MODE
BlitDoubleRects(
// calculate the address of the first byte of the source
(srcFrameP->frameBaseAddr +
(srcFrameP->scanLinePtrArray[srcBlitRectA.top - srcFrameP->frameRect.top]) +
(srcBlitRectA.left << 1)),
// calculate the address of the first byte of the destination
(dstFrameP->frameBaseAddr +
(dstFrameP->scanLinePtrArray[dstBlitRectA.top]) +
(dstBlitRectA.left << 1)),
// calculate the number of rows to blit
dstBlitRectA.bottom - dstBlitRectA.top,
// == dstBlitRectB.bottom - dstBlitRectB.top
// pass rect widths and offset info
SW_MAX(dstBlitRectA.right - dstBlitRectA.left,0) << 1,
SW_MAX(dstBlitRectB.right - dstBlitRectB.left,0) << 1,
&info
);
END_32_BIT_MODE
}
///--------------------------------------------------------------------------------------
// BP16BitInterlacedDoubleRectDrawProc
///--------------------------------------------------------------------------------------
SW_FUNC void BP16BitInterlacedDoubleRectDrawProc(
FramePtr srcFrameP,
FramePtr dstFrameP,
Rect* srcRectA,
Rect* dstRectA,
Rect* srcRectB,
Rect* dstRectB)
{
OffsetInfo info;
Rect srcBlitRectA = *srcRectA,
dstBlitRectA = *dstRectA,
srcBlitRectB = *srcRectB,
dstBlitRectB = *dstRectB;
int numRowsToCopy;
SW_ASSERT(srcFrameP->isFrameLocked && dstFrameP->isFrameLocked);
SW_ASSERT((*srcFrameP->framePort->portPixMap)->pixelSize == 16);
SW_ASSERT((*dstFrameP->framePort->portPixMap)->pixelSize == 16);
CLIP_RECT(dstFrameP->frameRect, srcBlitRectA, dstBlitRectA, true)
CLIP_RECT(dstFrameP->frameRect, srcBlitRectB, dstBlitRectB, true)
numRowsToCopy = dstBlitRectA.bottom - dstBlitRectA.top;
// Is destBlitRect height an even number?
if ( ! (numRowsToCopy & 1) )
numRowsToCopy >>= 1;
else
numRowsToCopy = (numRowsToCopy>>1) + 1;
if (numRowsToCopy < 1)
return;
info.srcOffsetAtoB = (srcBlitRectB.left - srcBlitRectA.right) << 1;
info.srcOffsetBtoA = ((srcBlitRectA.left - srcBlitRectB.right) << 1 ) +
(srcFrameP->frameRowBytes << 1);
info.dstOffsetAtoB = (dstBlitRectB.left - dstBlitRectA.right) << 1;
info.dstOffsetBtoA = ((dstBlitRectA.left - dstBlitRectB.right) << 1 ) +
(dstFrameP->frameRowBytes << 1);
START_32_BIT_MODE
BlitDoubleRects(
// calculate the address of the first byte of the source
(srcFrameP->frameBaseAddr +
(srcFrameP->scanLinePtrArray[srcBlitRectA.top - srcFrameP->frameRect.top]) +
(srcBlitRectA.left << 1)),
// calculate the address of the first byte of the destination
(dstFrameP->frameBaseAddr +
(dstFrameP->scanLinePtrArray[dstBlitRectA.top]) +
(dstBlitRectA.left << 1)),
// calculate the number of rows to blit
numRowsToCopy,
// pass rect widths and offset info
SW_MAX(dstBlitRectA.right - dstBlitRectA.left,0) << 1,
SW_MAX(dstBlitRectB.right - dstBlitRectB.left,0) << 1,
&info
);
END_32_BIT_MODE
}
#pragma mark -
///--------------------------------------------------------------------------------------
// BlitDoubleRects
//
// a blitter to merge two offscreen areas into one onscreen area
// NOTE: This implementation _always_ blits aligned doubles to the screen
///--------------------------------------------------------------------------------------
#pragma mark *** Theory:
#ifdef THEORY
#include <Memory.h>
#if SW_PPC
#define BlockMoveFunction BlockMoveDataUncached
// BlockMoveDataUncached is implemented in "DriverServicesLib" (strangely enough).
// Even more strangely, DriverServicesLib is only available on PCI-based Macs -
// making it kinda hard to run on others. Just include this library in your project,
// or go with BlockMoveData/memcpy (and suffer the consequences). -- AFB
//
// "the difference between theory and practice is greater in practice than in theory"
#else
#define BlockMoveFunction BlockMoveData
#endif
void BlitDoubleRects(
char *src,
char *dst,
unsigned long rows,
unsigned long bytesA,
unsigned long bytesB,
OffsetInfoPtr info)
{
int y;
for ( y = 0; y < rows; y++ )
{
BlockMoveFunction( src, dst , bytesA );
src += bytesA;
dst += bytesA;
src += info->srcOffsetAtoB;
dst += info->dstOffsetAtoB;
BlockMoveFunction( src, dst , bytesB );
src += bytesB;
dst += bytesB;
src += info->srcOffsetBtoA;
dst += info->dstOffsetBtoA;
}
}
#pragma mark *** C (optimized):
#elif defined(USE_C)
#if THINK_C
// NOTE: This code will not compile on THINK C, because it doesn't have decent pointers.
// Use a real compiler instead, or switch back to the assembly. Thank you.
#error
#endif
// This implementation is
// ©1998 Anders Fredrik Björklund. All rights reserved.
#define srcL ((long *) src)
#define dstL ((long *) dst)
#define bufferL ((long *) buffer)
#define srcD ((double *) src)
#define dstD ((double *) dst)
#define bufferD ((double *) buffer)
#if SW_PPC
#define kAlignmentMask 7
#else
#define kAlignmentMask 3
#endif
void BlitDoubleRects(
char *src,
char *dst,
unsigned long rows,
unsigned long bytesA,
unsigned long bytesB,
OffsetInfoPtr info)
{
char *buffer;
unsigned int x;
unsigned int leftblocks,leftwords,left;
unsigned int rightblocks,rightwords,right;
unsigned int alignA,alignB;
long srcOffsetBtoA,dstOffsetBtoA,srcOffsetAtoB,dstOffsetAtoB;
#if SW_PPC
Boolean useDoublesA,useDoublesB;
char DoubleBuffer[64];
#endif
// load stuff from struct
srcOffsetAtoB = info->srcOffsetAtoB;
dstOffsetAtoB = info->dstOffsetAtoB;
srcOffsetBtoA = info->srcOffsetBtoA;
dstOffsetBtoA = info->dstOffsetBtoA;
// alignment offset for rect A
alignA = (-((long) dst )) & kAlignmentMask;
if ( alignA > bytesA) alignA = bytesA;
bytesA -= alignA;
// alignment offset for rect B
alignB = (-((long) dst + bytesA + dstOffsetAtoB)) & kAlignmentMask;
if ( alignB > bytesB) alignB = bytesB;
bytesB -= alignB;
//pre-calculate transfer sizes
leftblocks = bytesA >> 5;
left = bytesA & 31;
rightblocks = bytesB >> 5;
right = bytesB & 31;
#if SW_PPC
// alignment for source (can use doubles if word-aligned)
useDoublesA = (alignA & 3) == ((-((long) src )) & 3);
useDoublesB = (alignB & 3) == ((-((long) src + bytesA + srcOffsetAtoB)) & 3);
// align buffer to 32-byte boundary (cache line)
buffer = (char *) (( (long) DoubleBuffer + 32) & ~31L);
#define COPY_BLOCKS_LONG(blocks) \
for ( x = 0; x < blocks; x++) \
{ register long t1,t2,t3,t4; \
register double f1,f2,f3,f4; \
t1 = srcL[0]; t2 = srcL[1]; t3 = srcL[2]; t4 = srcL[3]; \
bufferL[0] = t1; bufferL[1] = t2; bufferL[2] = t3; bufferL[3] = t4; \
t1 = srcL[4]; t2 = srcL[5]; t3 = srcL[6]; t4 = srcL[7]; srcL += 8; \
f1 = bufferD[0]; f2 = bufferD[1]; \
bufferL[4] = t1; bufferL[5] = t2; bufferL[6] = t3; bufferL[7] = t4; \
f3 = bufferD[2]; f4 = bufferD[3]; \
dstD[0] = f1; dstD[1] = f2; dstD[2] = f3; dstD[3] = f4; dstD += 4; \
}
#define COPY_LEFTOVER_LONG(longs,bytes) \
for ( x = 0; x < longs; x++) *dstL++ = *srcL++; \
if ( bytes & 2) *((short *)dst)++ = *((short *)src)++; \
if ( bytes & 1) *((char *)dst)++ = *((char *)src)++;
#define COPY_BLOCKS_DOUBLE(blocks) \
for ( x = 0; x < blocks; x++) \
{ register double t1,t2,t3,t4; \
t1 = srcD[0]; t2 = srcD[1]; t3 = srcD[2]; t4 = srcD[3]; srcD += 4; \
dstD[0] = t1; dstD[1] = t2; dstD[2] = t3; dstD[3] = t4; dstD += 4; \
}
#define COPY_LEFTOVER_DOUBLE(doubles,bytes) \
for ( x = 0; x < doubles; x++) *dstD++ = *srcD++; \
if ( bytes & 4) *((long *)dst)++ = *((long *)src)++; \
if ( bytes & 2) *((short *)dst)++ = *((short *)src)++; \
if ( bytes & 1) *((char *)dst)++ = *((char *)src)++;
#define COPY_ALIGN(align) \
if ( align & 4) *((long *)dst)++ = *((long *)src)++; \
if ( align & 1) *((char *)dst)++ = *((char *)src)++; \
if ( align & 2) *((short *)dst)++ = *((short *)src)++;
// ----------------------------------------------------------------------------
if ( useDoublesA && useDoublesB ) // Both rects aligned
{
leftwords = left >> 3;
rightwords = right >> 3;
do
{
COPY_ALIGN(alignA)
COPY_BLOCKS_DOUBLE(leftblocks)
COPY_LEFTOVER_DOUBLE(leftwords,left)
src += srcOffsetAtoB;
dst += dstOffsetAtoB;
COPY_ALIGN(alignB)
COPY_BLOCKS_DOUBLE(rightblocks)
COPY_LEFTOVER_DOUBLE(rightwords,right)
src += srcOffsetBtoA;
dst += dstOffsetBtoA;
}
while (--rows);
}
else if ( useDoublesA ) // Left rect aligned
{
leftwords = left >> 3;
rightwords = right >> 2;
do
{
COPY_ALIGN(alignA)
COPY_BLOCKS_DOUBLE(leftblocks)
COPY_LEFTOVER_DOUBLE(leftwords,left)
src += srcOffsetAtoB;
dst += dstOffsetAtoB;
COPY_ALIGN(alignB)
COPY_BLOCKS_LONG(rightblocks)
COPY_LEFTOVER_LONG(rightwords,right)
src += srcOffsetBtoA;
dst += dstOffsetBtoA;
}
while (--rows);
}
else if ( useDoublesB ) // Right rect aligned
{
leftwords = left >> 2;
rightwords = right >> 3;
do
{
COPY_ALIGN(alignA)
COPY_BLOCKS_LONG(leftblocks)
COPY_LEFTOVER_LONG(leftwords,left)
src += srcOffsetAtoB;
dst += dstOffsetAtoB;
COPY_ALIGN(alignB)
COPY_BLOCKS_DOUBLE(rightblocks)
COPY_LEFTOVER_DOUBLE(rightwords,right)
src += srcOffsetBtoA;
dst += dstOffsetBtoA;
}
while (--rows);
}
else // None of the rects aligned
{
leftwords = left >> 2;
rightwords = right >> 2;
do
{
COPY_ALIGN(alignA)
COPY_BLOCKS_LONG(leftblocks)
COPY_LEFTOVER_LONG(leftwords,left)
src += srcOffsetAtoB;
dst += dstOffsetAtoB;
COPY_ALIGN(alignB)
COPY_BLOCKS_LONG(rightblocks)
COPY_LEFTOVER_LONG(rightwords,right)
src += srcOffsetBtoA;
dst += dstOffsetBtoA;
}
while (--rows);
}
#else // ! SW_PPC
leftwords = left >> 2;
rightwords = right >> 2;
do
{
// align destination
if ( alignA & 1)
*((char *)dst)++ = *((char *)src)++;
if ( alignA & 2)
*((short *)dst)++ = *((short *)src)++;
// copy 32 byte blocks
for ( x = 0; x < leftblocks; x++)
{
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
}
// copy left-over bytes (<32)
for ( x = 0; x < leftwords; x++)
*((long *)dst)++ = *((long *)src)++;
if ( left & 2)
*((short *)dst)++ = *((short *)src)++;
if ( left & 1)
*((char *)dst)++ = *((char *)src)++;
src += srcOffsetAtoB;
dst += dstOffsetAtoB;
// align destination
if ( alignB & 1)
*((char *)dst)++ = *((char *)src)++;
if ( alignB & 2)
*((short *)dst)++ = *((short *)src)++;
// copy 32 byte blocks
for ( x = 0; x < rightblocks; x++)
{
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
*dstL++ = *srcL++;
}
// copy left-over bytes (<32)
for ( x = 0; x < rightwords; x++)
*((long *)dst)++ = *((long *)src)++;
if ( right & 2)
*((short *)dst)++ = *((short *)src)++;
if ( right & 1)
*((char *)dst)++ = *((char *)src)++;
src += srcOffsetBtoA;
dst += dstOffsetBtoA;
} while (--rows);
#endif // SW_PPC
}
#undef srcL
#undef dstL
#undef bufferL
#undef srcD
#undef dstD
#undef bufferD
#else // !USE_C == USE_ASM
#pragma mark *** 68k asm:
#if !SW_PPC
// This 680X0 asm implementation is
// ©1997 Anders Fredrik Björklund. All rights reserved.
// mailto:coderonin@geocities.com ¡¡¡ FRONT LINE ASSEMBLER !!!
SW_ASM_FUNC void BlitDoubleRects(
char *src,
char *dst,
unsigned long rows,
unsigned long bytesA,
unsigned long bytesB,
OffsetInfoPtr info)
{
// VARIABLE(S) REGISTER
// *temp* D0
// *temp*,y D1
// bytesA,left D2
// leftblocks D3
// bytesB,right D4
// dst D5
// alignA D6
// alignB D7
// src A0
// dst A1
// startA A2
// startB A3
// offsetinfoptr A4
SW_ASM_BEGIN
#if __MWERKS__
fralloc
#endif
MOVEM.L D3-D7/A2-A4,-(SP)
MOVEA.L src,A0
MOVEA.L dst,A1
MOVE.L bytesA,D2
MOVE.L bytesB,D4
// srcOffsetAtoB = info->srcOffsetAtoB;
// srcOffsetBtoA = info->srcOffsetBtoA;
// dstOffsetAtoB = info->dstOffsetAtoB;
// dstOffsetBtoA = info->dstOffsetBtoA;
MOVEA.L info,A4
#define srcOffsetAtoB (A4)
#define srcOffsetBtoA 4(A4)
#define dstOffsetAtoB 8(A4)
#define dstOffsetBtoA 12(A4)
// alignment offset for rect A
// align = (-((long) dst )) & 3
MOVE.L A1,D6
MOVEQ #3,D1
NEG.L D6
AND.W D1,D6
// alignment offset for rect B
// align = (-((long) dst + bytesA + dstOffsetAtoB)) & 3
MOVE.L A1,D7
ADD.L D2,D7
ADD.L dstOffsetAtoB,D7
NEG.L D7
AND.W D1,D7
// if ( alignA > bytesA) alignA = bytesA;
CMP.W D2,D6
BLE.S @alignAok
MOVE.W D2,D6
@alignAok:
// bytesB -= align;
SUB.W D6,D2
// if ( alignB > bytesB) alignB = bytesB;
CMP.W D4,D7
BLE.S @alignBok
MOVE.W D4,D7
@alignBok:
// bytesB -= align;
SUB.W D7,D4
//pre-calculate transfer sizes
MOVE.W #15,D0
// leftblocks = bytesA >> 6;
// left = bytesA & 63;
MOVE.W D2,D3
LSR.W #6,D3
// calculate words outside blocks
MOVE.W D2,D1
ANDI.W #3,D2
LSR.W #2,D1 // / sizeof(long)
AND.W D0,D1
ADD.W D1,D1 // * sizeof(MOVE.L (A0)+,(A1)+)
LEA @leftloopend,A2
SUBA.L D1,A2
// rightblocks = bytesB >> 6;
// right = bytesB & 63;
MOVE.W D4,D5
LSR.W #6,D5
// calculate words outside blocks
MOVE.W D4,D1
ANDI.W #3,D4
LSR.W #2,D1 // / sizeof(long)
AND.W D0,D1
ADD.W D1,D1 // * sizeof(MOVE.L (A0)+,(A1)+)
LEA @rightloopend,A3
SUBA.L D1,A3
// if (rows) do
// {
MOVE.L rows,D1
TST.W D1
BEQ @end
@rowloop:
MOVE.W D6,D0
ANDI.W #1,D0
BEQ.S @skipalignAbyte
MOVE.B (A0)+,(A1)+
@skipalignAbyte:
MOVE.W D6,D0
ANDI.W #2,D0
BEQ.S @skipalignAword
MOVE.W (A0)+,(A1)+
@skipalignAword:
// copy 64 byte blocks
MOVE.W D3,D0
JMP (A2)
@leftloop:
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
@leftloopend:
DBRA D0,@leftloop
MOVE.W D2,D0
BEQ.S @skipleftbyte
SUBQ.W #2,D0
BMI.S @leftbyte
MOVE.W (A0)+,(A1)+
TST D0
BEQ.S @skipleftbyte
@leftbyte:
MOVE.B (A0)+,(A1)+
@skipleftbyte:
// src += srcOffsetAtoB;
// dst += dstOffsetAtoB;
ADDA.L srcOffsetAtoB,A0
ADDA.L dstOffsetAtoB,A1
// align destination to 4-byte boundary
MOVE.W D7,D0
ANDI.W #1,D0
BEQ.S @skipalignBbyte
MOVE.B (A0)+,(A1)+
@skipalignBbyte:
MOVE.W D7,D0
ANDI.W #2,D0
BEQ.S @skipalignBword
MOVE.W (A0)+,(A1)+
@skipalignBword:
// copy 64 byte blocks
MOVE.W D5,D0
JMP (A3)
@rightloop:
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
MOVE.L (A0)+,(A1)+
@rightloopend:
DBRA D0,@rightloop
MOVE.W D4,D0
BEQ.S @skiprightbyte
SUBQ.W #2,D0
BMI.S @rightbyte
MOVE.W (A0)+,(A1)+
TST D0
BEQ.S @skiprightbyte
@rightbyte:
MOVE.B (A0)+,(A1)+
@skiprightbyte:
// src += srcOffsetBtoA;
// dst += dstOffsetBtoA;
ADDA.L srcOffsetBtoA,A0
ADDA.L dstOffsetBtoA,A1
// } while (--rows);
SUBQ.W #1,D1
BNE @rowloop
@end:
#undef srcOffsetAtoB
#undef srcOffsetBtoA
#undef dstOffsetAtoB
#undef dstOffsetBtoA
MOVEM.L (SP)+,D3-D7/A2-A4
#if __MWERKS__
frfree
#endif
SW_ASM_END
}
#endif //#if !SW_PPC
#pragma mark *** PowerPC asm:
#if SW_PPC
// This PowerPC asm implementation is
// ©1997-98 Anders Fredrik Björklund. All rights reserved.
// mailto:coderonin@geocities.com ¡¡¡ RISC RULEZ !!!
asm void BlitDoubleRects(
register char *src,
register char *dst,
register unsigned long rows,
register unsigned long bytesA,
register unsigned long bytesB,
register OffsetInfoPtr info)
{
// LOCAL VARIABLES:
#define y r31
#define alignA r30
#define alignB r29
#define leftdoubles r30 // recycled
#define rightdoubles r29 // recycled
#define srcOffsetAtoB r28
#define srcOffsetBtoA r27
#define dstOffsetAtoB r26
#define dstOffsetBtoA r25
#define leftblocks r24
#define left r23
#define rightblocks r22
#define right r21
#define buffer r20
#define offset r19
#define kRegisterSaveStack (13 * 4)
stmw r19,-kRegisterSaveStack(SP) // save registers on stack (in the "red zone")
// PARAMETERS:
// src r3
// dst r4
#define rows r5
#define bytesA r6
#define bytesB r7
#define info r8
// srcOffsetAtoB = info->srcOffsetAtoB;
// srcOffsetBtoA = info->srcOffsetBtoA;
// dstOffsetAtoB = info->dstOffsetAtoB;
// dstOffsetBtoA = info->dstOffsetBtoA;
lwz srcOffsetAtoB,0(info)
lwz srcOffsetBtoA,4(info)
lwz dstOffsetAtoB,8(info)
lwz dstOffsetBtoA,12(info)
// get a cache-block aligned stack storage for buffer
addi buffer,SP,-(kRegisterSaveStack + 32) // (still in the red zone)
rlwinm buffer,buffer,0,0,26
dcbtst r0,buffer
// alignment offset for rect A
// align = (-((long) dst )) & 7
neg alignA,r4
rlwinm alignA,alignA,0,29,31
// if ( alignA > bytesA) alignA = bytesA;
// bytesA -= alignA;
cmplw alignA,bytesA
ble @alignAok
mr alignA,bytesA
@alignAok:
sub bytesA,bytesA,alignA
neg r9,r3
rlwinm r9,r9,0,30,31
// alignment offset for rect B
// align = (-((long) dst + bytesA + dstOffsetAtoB)) & 7
add r0,bytesA,dstOffsetAtoB
add alignB,r4,r0
neg alignB,alignB
rlwinm alignB,alignB,0,29,31
// if ( alignB > bytesB) alignB = bytesB;
// bytesB -= alignB
cmplw alignB,bytesB
ble @alignBok
mr alignB,bytesB
@alignBok:
sub bytesB,bytesB,alignB
add r0,bytesA,srcOffsetAtoB
add r10,r3,r0
neg r10,r10
rlwinm r10,r10,0,30,31
// ———————————————————————————————————————————————————————————————————————————
//pre-calculate transfer sizes
rlwinm leftblocks,bytesA,27,5,31 // leftblocks = bytesA / 32;
rlwinm left,bytesA,0,27,31 // left = bytesA % 32;
rlwinm rightblocks,bytesB,27,5,31 // rightblocks = bytesB / 32;
rlwinm right,bytesB,0,27,31 // right = bytesB % 32;
mr. y,rows
ble @gohome
mfcr r0 // save CR in r0
// NOTE : don't use r0 from here and below (except as zero)
rlwinm alignA,alignA,5*4,9,11
mtcrf 32,alignA // cr2 = alignA & 7
rlwinm alignB,alignB,2*4,21,23
mtcrf 4,alignB // cr5 = alignB & 7
rlwinm alignA,alignA,12,29,31 // shift back, and with 3
rlwinm alignB,alignB,24,29,31
cmplw cr0,alignA,r9
cmplw cr1,alignB,r10
cmplwi cr3,leftblocks,0
cmplwi cr4,left,0
cmplwi cr6,rightblocks,0
cmplwi cr7,right,0
bne cr0,@leftNotAligned
rlwinm. leftdoubles,left,29,30,31
rlwinm left,left,3*4,17,19
creqv 12,12,12 // crset cr3_LT
mtcrf 8,left // cr4 = left & 7
crnor 16,2,2 // cr4_LT = leftdoubles > 0
@leftNotAligned:
bne cr1,@rightNotAligned
rlwinm. rightdoubles,right,29,30,31
rlwinm right,right,0*4,29,31
creqv 24,24,24 // crset cr6_LT
mtcrf 1,right // cr7 = right & 7
crnor 28,2,2 // cr7_LT = rightdoubles > 0
@rightNotAligned:
li offset,32
sub r3,r3,offset
sub r4,r4,offset
@rowloop:
// ———————————————————————————————————————————————————————————————————————————
// NOTE: from here on, regs r5-r12 are scratch!
// NOTE : cr0 is used for rows (y), cr1 is free
// align destination
bns cr2,@skipalignAByte
lbz r5,32(r3)
addi r3,r3,1
stb r5,32(r4)
addi r4,r4,1
@skipalignAByte:
bne cr2,@skipalignAWord
lhz r5,32(r3)
addi r3,r3,2
sth r5,32(r4)
addi r4,r4,2
@skipalignAWord:
bng cr2,@skipalignALong
lwz r5,32(r3)
addi r3,r3,4
stw r5,32(r4)
addi r4,r4,4
@skipalignALong:
// copy 32 byte blocks
blt cr3,@leftAligned
beq cr3,@skipleft
mtctr leftblocks
@leftloop:
lwzu r5,32(r3)
lwz r6,4(r3)
lwz r7,8(r3)
lwz r8,12(r3)
stw r5,0(buffer)
stw r6,4(buffer)
stw r7,8(buffer)
stw r8,12(buffer)
lwz r9,16(r3)
lwz r10,20(r3)
lwz r11,24(r3)
lwz r12,28(r3)
lfd fp1,0(buffer)
lfd fp2,8(buffer)
stw r9,16(buffer)
stw r10,20(buffer)
stw r11,24(buffer)
stw r12,28(buffer)
lfd fp3,16(buffer)
lfd fp4,24(buffer)
stfdu fp1,32(r4)
stfd fp2,8(r4)
stfd fp3,16(r4)
stfd fp4,24(r4)
bdnz @leftloop
@skipleft:
// copy left-over bytes (<32)
beq cr4,@endleft
mtxer left
lswx r5,offset,r3
add r3,r3,left
stswx r5,offset,r4
add r4,r4,left
b @endleft
@leftAligned:
beq cr3,@skipleftAligned
mtctr leftblocks
@leftloopAligned:
lfdu fp1,32(r3)
lfd fp2,8(r3)
lfd fp3,16(r3)
lfd fp4,24(r3)
stfdu fp1,32(r4)
stfd fp2,8(r4)
stfd fp3,16(r4)
stfd fp4,24(r4)
bdnz @leftloopAligned
@skipleftAligned:
bnl cr4,@skipADoubles
mtctr leftdoubles
@leftloopDouble:
lfd fp0,32(r3)
addi r3,r3,8
stfd fp0,32(r4)
addi r4,r4,8
bdnz @leftloopDouble
@skipADoubles:
bng cr4,@skipALong
lwz r5,32(r3)
addi r3,r3,4
stw r5,32(r4)
addi r4,r4,4
@skipALong:
bne cr4,@skipAWord
lhz r5,32(r3)
addi r3,r3,2
sth r5,32(r4)
addi r4,r4,2
@skipAWord:
bns cr4,@skipAByte
lbz r5,32(r3)
addi r3,r3,1
stb r5,32(r4)
addi r4,r4,1
@skipAByte:
@endleft:
add r3,r3,srcOffsetAtoB
add r4,r4,dstOffsetAtoB
// ———————————————————————————————————————————————————————————————————————————
// align destination
bns cr5,@skipalignBByte
lbz r5,32(r3)
addi r3,r3,1
stb r5,32(r4)
addi r4,r4,1
@skipalignBByte:
bne cr5,@skipalignBWord
lhz r5,32(r3)
addi r3,r3,2
sth r5,32(r4)
addi r4,r4,2
@skipalignBWord:
bng cr5,@skipalignBLong
lwz r5,32(r3)
addi r3,r3,4
stw r5,32(r4)
addi r4,r4,4
@skipalignBLong:
// copy 32 byte blocks
blt cr6,@rightAligned
beq cr6,@skipright
mtctr rightblocks
@rightloop:
lwzu r5,32(r3)
lwz r6,4(r3)
lwz r7,8(r3)
lwz r8,12(r3)
stw r5,0(buffer)
stw r6,4(buffer)
stw r7,8(buffer)
stw r8,12(buffer)
lwz r9,16(r3)
lwz r10,20(r3)
lwz r11,24(r3)
lwz r12,28(r3)
lfd fp1,0(buffer)
lfd fp2,8(buffer)
stw r9,16(buffer)
stw r10,20(buffer)
stw r11,24(buffer)
stw r12,28(buffer)
lfd fp3,16(buffer)
lfd fp4,24(buffer)
stfdu fp1,32(r4)
stfd fp2,8(r4)
stfd fp3,16(r4)
stfd fp4,24(r4)
bdnz @rightloop
@skipright:
// copy left-over bytes (<32)
beq cr7,@endright
mtxer right
lswx r5,offset,r3
add r3,r3,right
stswx r5,offset,r4
add r4,r4,right
b @endright
@rightAligned:
beq cr6,@skiprightAligned
mtctr rightblocks
@rightloopAligned:
lfdu fp1,32(r3)
lfd fp2,8(r3)
lfd fp3,16(r3)
lfd fp4,24(r3)
stfdu fp1,32(r4)
stfd fp2,8(r4)
stfd fp3,16(r4)
stfd fp4,24(r4)
bdnz @rightloopAligned
@skiprightAligned:
bnl cr7,@skipBDoubles
mtctr rightdoubles
@rightloopDouble:
lfd fp0,32(r3)
addi r3,r3,8
stfd fp0,32(r4)
addi r4,r4,8
bdnz @rightloopDouble
@skipBDoubles:
bng cr7,@skipBLong
lwz r5,32(r3)
addi r3,r3,4
stw r5,32(r4)
addi r4,r4,4
@skipBLong:
bne cr7,@skipBWord
lhz r5,32(r3)
addi r3,r3,2
sth r5,32(r4)
addi r4,r4,2
@skipBWord:
bns cr7,@skipBByte
lbz r5,32(r3)
addi r3,r3,1
stb r5,32(r4)
addi r4,r4,1
@skipBByte:
@endright:
// ———————————————————————————————————————————————————————————————————————————
subic. y,y,1
add r3,r3,srcOffsetBtoA
add r4,r4,dstOffsetBtoA
bne @rowloop
@end:
mtcrf 0xFF,r0 // restore CR (mtcr r0)
@gohome:
#undef y
#undef leftblocks
#undef left
#undef rightblocks
#undef right
#undef alignA
#undef alignB
#undef srcOffsetAtoB
#undef srcOffsetBtoA
#undef dstOffsetAtoB
#undef dstOffsetBtoA
#undef buffer
#undef offset
lmw r19,-kRegisterSaveStack(SP) // restore registers from stack
#undef rows
#undef bytesA
#undef bytesB
#undef info
blr
}
#endif //#if SW_PPC
#endif //#if THEORY/USE_C/USE_ASM